slope <- map_dfr(rigal_trends,
  ~.x %>%
  select(siteid, linear_slope),
.id = "response"
)
tar_load(rigal_slp_df)
slope_df <- rigal_slp_df

1 Summary

summary_slope <- slope %>%
  group_by(response) %>%
  summarise(summ = list(enframe(summary_distribution(linear_slope, na.rm = TRUE)))) %>%
  unnest(cols = summ) %>%
  pivot_wider(names_from = "name", values_from = "value")
summary_slope %>%
  mutate(response = get_var_replacement()[response]) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "bordered", "hover")) %>%
  scroll_box(width = "100%", height = "400px")
response min 1st_quart median 2nd_quart max mean sd n n_na frac_na
Appearance -0.0384615 0.0000000 0.0053644 0.0133333 0.0986395 0.0084877 0.0122101 5516 0 0
Chao (binary, similarity) -0.1209148 -0.0124542 0.0000000 0.0000000 0.0748988 -0.0080145 0.0165902 5516 0 0
Chao evenness -1.2609304 -0.0247924 0.0000000 0.0282298 2.1442931 0.0021943 0.0869975 5516 0 0
Chao species richness -2.8250079 -0.0435423 0.0027879 0.0694623 2.2999419 0.0160926 0.1864729 5516 0 0
Chao shannon -1.0688101 -0.0337486 0.0033085 0.0500790 0.8412016 0.0109086 0.1104958 5516 0 0
Chao simpson -2.2048429 -0.0294179 0.0026013 0.0440541 0.7745609 0.0077291 0.0994345 5516 0 0
Disappearance -0.0300000 0.0000000 0.0030656 0.0097046 0.0651261 0.0060655 0.0093288 5516 0 0
Evenness -0.1006653 -0.0074240 0.0000280 0.0085548 0.1019762 0.0010409 0.0185918 5516 0 0
SER_a (rel abundance) -0.1258587 -0.0296959 -0.0134346 -0.0016327 0.0634554 -0.0184716 0.0216062 5516 0 0
Horn (binary, similarity) -0.1179941 -0.0192192 -0.0104115 -0.0035745 0.0421363 -0.0131735 0.0142384 5516 0 0
Jaccard (binary, similarity) -0.1209148 -0.0268493 -0.0152704 -0.0059477 0.0384615 -0.0180388 0.0171575 5516 0 0
Jaccard (binary, dissimilarity) -0.0384615 0.0059477 0.0152704 0.0268493 0.1209148 0.0180388 0.0171575 5516 0 0
Log species richness -20.6068500 -1.0398436 0.1305570 1.7745696 43.8907431 0.5690175 3.5608475 5516 0 0
Log total abundance -66.9185877 -3.4233489 0.5582744 4.8750841 80.6903559 1.2164360 8.8216646 5516 0 0
Nestedness (jaccard) -0.0577731 0.0000000 0.0053424 0.0147321 0.1377152 0.0090678 0.0154355 5516 0 0
Shannon -0.2007711 -0.0106280 0.0008848 0.0163682 0.2212985 0.0031355 0.0287062 5516 0 0
Simpson -0.0829149 -0.0055055 0.0002939 0.0073310 0.0921231 0.0013521 0.0142783 5516 0 0
Species richness -1.6727273 -0.0419836 0.0038939 0.0729567 2.0714286 0.0233072 0.1763604 5516 0 0
Total turnover (codyn) -0.0384615 0.0041152 0.0119597 0.0220058 0.0986395 0.0145533 0.0147505 5516 0 0
Total abundance -1726.0511201 -1.7400804 0.0955316 2.3416650 1027.3686567 2.2395645 42.9647716 5516 0 0
Turnover (jaccard) -0.0769231 0.0000000 0.0022039 0.0156534 0.1512605 0.0089710 0.0161379 5516 0 0
bs <- map(var_temporal_trends, ~summary_distribution(slope_df[[.x]]))
names(bs) <- var_temporal_trends
  • Jaccard trends: -0.018 (0.017 s.d.) (-0.01 in @dornelas_assemblage_2014).
    • meaning -0.18 % change per decade in average
    • Jaccard decreased in 89% of the sites (79% in @dornelas_assemblage_2014)
  • Richness trends:
    • +0.6% richness change per year (3.6 s.d.), i.e. 6% per decade.

5 Comparison rigal, lm

rigal_trends_df <-  map2_dfr(
  rigal_trends, names(rigal_trends),
  ~.x %>% mutate(variable = .y)
  ) %>%
  select(variable, siteid, linear_slope) %>%
  pivot_wider(names_from = "variable", values_from = "linear_slope") %>%
  mutate(type = "rigal")
slope_comp <- list()
slope_comp$rigal <- rigal_trends_df[order(rigal_trends_df$siteid), ]
slope_comp$simple_lm <- slope_df[order(slope_df$siteid), ]
stopifnot(all(slope_comp$rigal$siteid == slope_comp$simple_lm$siteid))
slope_comp$diff <- tibble(
  siteid = slope_comp$rigal$siteid)
for (i in var_temporal_trends) {
  slope_comp$diff[[i]] <- slope_comp$rigal[[i]] - slope_comp$simple_lm[[i]]
}
  • All good !
old_par <- par()
par(mfrow = c(3, 2))
for (i in var_temporal_trends) {
  plot(
    slope_comp$rigal[[i]] ~
      slope_comp$simple_lm[[i]],
    ylab = "Rigal (polynomial degree 2)",
    xlab = "Simple LM"
  )
  abline(0, 1)
  title(i)
}

par(old_par)
#> Warning in par(old_par): graphical parameter "cin" cannot be set
#> Warning in par(old_par): graphical parameter "cra" cannot be set
#> Warning in par(old_par): graphical parameter "csi" cannot be set
#> Warning in par(old_par): graphical parameter "cxy" cannot be set
#> Warning in par(old_par): graphical parameter "din" cannot be set
#> Warning in par(old_par): graphical parameter "page" cannot be set

# Compare Rigal and simple lm for few sites
plot_rigal_raw_data <- function(
  site = NULL,
  response = NULL,
  dataset = NULL,
  rigal_trends = NULL
  ) {

  raw_data <- dataset %>%
    filter(siteid == site)
  rigal_resp <- rigal_trends[[response]]
  rigal_resp_site <- rigal_resp[rigal_resp$siteid == site, ]
  plot(raw_data[[response]]~raw_data[["year"]], ylab = response, xlab = "Year")
  abline(rigal_resp_site$intercept, rigal_resp_site$linear_slope, col
  = "red")
  abline(lm(raw_data[[response]]~raw_data[["year"]]), col = "green")
  title(main = site)
}
plot_rigal_raw_data(site = "S2672", response = "log_total_abundance", dataset =
analysis_dataset, rigal_trends = rigal_trends)

slope_comp$diff <- slope_comp$diff %>%
  arrange(desc(abs(log_total_abundance)))
old_par <- par()
par(mfrow = c(3, 2))
for (i in slope_comp$diff[1:6,]$siteid) {
  plot_rigal_raw_data(
    site = i,
    response = "log_total_abundance",
    dataset = analysis_dataset,
    rigal_trends = rigal_trends
  )
}

par(old_par)
#> Warning in par(old_par): graphical parameter "cin" cannot be set
#> Warning in par(old_par): graphical parameter "cra" cannot be set
#> Warning in par(old_par): graphical parameter "csi" cannot be set
#> Warning in par(old_par): graphical parameter "cxy" cannot be set
#> Warning in par(old_par): graphical parameter "din" cannot be set
#> Warning in par(old_par): graphical parameter "page" cannot be set

9 Environment

tar_load(c(wt_mv_avg, riveratlas_site))
col_to_keep <- get_river_atlas_significant_var()
names(col_to_keep) <- NULL

riv <- riveratlas_site %>%
  clean_names() %>%
  select(all_of(c("siteid", col_to_keep))) %>%
  st_drop_geometry()
slp_riv <- slope_df %>%
  select(siteid, log_species_nb, evenness, turnover, nestedness, hillebrand,
    appearance, disappearance, jaccard) %>%
left_join(riv, by = "siteid") %>%
left_join(select(filtered_dataset$location, siteid, ecoregion, main_bas, latitude, longitude), by = "siteid")

length(unique(slp_riv$main_bas))
#> [1] 309
slp_riv %>%
  ggplot(aes(y = jaccard, x = as.factor(ord_stra), color = ecoregion)) +
  geom_boxplot()


slp_riv %>%
  ggplot(aes(y = jaccard, x = log(dist_up_km), color = ecoregion)) +
  geom_point() +
  geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Warning: Removed 600 rows containing missing values (geom_point).


# slp_riv %>%
#   ggplot(aes(y = jaccard, x = as.factor(ord_flow), color = ecoregion)) +
#   geom_boxplot()

slp_riv %>%
  ggplot(aes(y = jaccard, x = tmp_dc_cyr /10, color = ecoregion)) +
  geom_point() +
  geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Removed 600 rows containing missing values (geom_point).

slp_riv %>%
  ggplot(aes(y = hillebrand, x = as.factor(ord_stra), color = ecoregion)) +
  geom_boxplot()


slp_riv %>%
  ggplot(aes(y = hillebrand, x = log(dist_up_km), color = ecoregion)) +
  geom_point() +
  geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Warning: Removed 600 rows containing missing values (geom_point).


# slp_riv %>%
#   ggplot(aes(y = hillebrand, x = as.factor(ord_flow), color = ecoregion)) +
#   geom_boxplot()

slp_riv %>%
  ggplot(aes(y = hillebrand, x = tmp_dc_cyr /10, color = ecoregion)) +
  geom_point() +
  geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Removed 600 rows containing missing values (geom_point).

slp_riv %>%
  ggplot(aes(y = hillebrand, x = log(dist_up_km), color = as.factor(main_bas))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) + 
  theme(legend.position = "none")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Warning: Removed 600 rows containing missing values (geom_point).


slp_riv %>%
  ggplot(aes(y = hillebrand, x = ord_stra, color = as.factor(main_bas))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme(legend.position = "none")
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 600 rows containing non-finite values (stat_smooth).
#> Removed 600 rows containing missing values (geom_point).


# slp_riv %>%
#   ggplot(aes(y = hillebrand, x = ord_flow, color = as.factor(main_bas))) +
#   geom_point() +
#   geom_smooth(method = "lm", se = FALSE) +
#   theme(legend.position = "none")
ti <- slp_riv %>%
 nest_by(main_bas) %>%
 filter(nrow(data) > 10 & sum(!is.na(data$dist_up_km)) > 10) 

tu <- ti %>%
  mutate(
    m = list(lm(jaccard ~ log(dist_up_km), data = data)),
    coef = list(broom::tidy(m))
  ) %>%
  unnest(coef) %>%
  filter(term == "log(dist_up_km)") 

tu %>%
  ggplot(aes(x = estimate)) +
  geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


summary(lm(jaccard ~ log(dist_up_km), slp_riv))
#> 
#> Call:
#> lm(formula = jaccard ~ log(dist_up_km), data = slp_riv)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.100741 -0.008875  0.002875  0.012125  0.057293 
#> 
#> Coefficients:
#>                   Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)     -0.0139322  0.0007663 -18.181  < 2e-16 ***
#> log(dist_up_km) -0.0013307  0.0002148  -6.196 6.25e-10 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.0171 on 4914 degrees of freedom
#>   (600 observations deleted due to missingness)
#> Multiple R-squared:  0.007753,   Adjusted R-squared:  0.007551 
#> F-statistic: 38.39 on 1 and 4914 DF,  p-value: 6.25e-10

9.1 Human footprint

tar_load(riveratlas_site)
colnames(riveratlas_site)[
  str_detect(colnames(riveratlas_site),
    "hft_ix_")]
#> [1] "hft_ix_c93" "hft_ix_u93" "hft_ix_c09" "hft_ix_u09"


hftc <- riveratlas_site[,
        colnames(riveratlas_site) %in%
          c("siteid", "hft_ix_c93", "hft_ix_c09")
        ] %>%
        st_drop_geometry() %>%
        mutate(
          cat_hft_ix_c09 = cut(hft_ix_c09, 6),
          cat_hft_ix_c93 = cut(hft_ix_c93, 6),
          hft_ix_c9309_percent =
            (hft_ix_c09 - hft_ix_c93) / hft_ix_c93 * 100,
          hft_ix_c9309_ratio = hft_ix_c09 / hft_ix_c93,
          hft_ix_c9309_log_ratio = log(hft_ix_c9309_ratio),
          hft_ix_c9309_diff = hft_ix_c09 - hft_ix_c93,
          log_hft_ix_c9309_percent = log(hft_ix_c9309_percent),
          cat_hftc9309 = cut(hft_ix_c9309_percent, 6),
          cat_hft_ix_c9309_log_ratio = cut(hft_ix_c9309_log_ratio, 6)
        )
#> Warning in log(hft_ix_c9309_percent): NaNs produced
skimr::skim(hftc)
#> Warning in inline_hist(log_hft_ix_c9309_percent, 5): Variable contains Inf or
#> -Inf value(s) that were converted to NA.
Table 9.1: Data summary
Name hftc
Number of rows 4916
Number of columns 12
_______________________
Column type frequency:
character 1
factor 4
numeric 7
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
siteid 0 1 2 6 0 4916 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
cat_hft_ix_c09 0 1 FALSE 6 (77: 1391, (1.: 1247, (15: 871, (22: 629
cat_hft_ix_c93 0 1 FALSE 6 (77: 1254, (1.: 1077, (15: 958, (22: 694
cat_hftc9309 0 1 FALSE 3 (-7: 4913, (22: 2, (1.: 1, (51: 0
cat_hft_ix_c9309_log_ratio 0 1 FALSE 6 (-0: 2866, (-0: 1970, (-1: 59, (0.: 18

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
hft_ix_c93 0 1.00 182.44 112.87 2.00 85.00 165.50 268.25 456.00 <U+2587><U+2587><U+2586><U+2585><U+2582>
hft_ix_c09 0 1.00 169.08 111.71 2.00 77.00 142.00 250.00 456.00 <U+2587><U+2587><U+2585><U+2583><U+2582>
hft_ix_c9309_percent 0 1.00 -6.46 33.05 -76.92 -18.03 -4.10 0.00 1709.09 <U+2587><U+2581><U+2581><U+2581><U+2581>
hft_ix_c9309_ratio 0 1.00 0.94 0.33 0.23 0.82 0.96 1.00 18.09 <U+2587><U+2581><U+2581><U+2581><U+2581>
hft_ix_c9309_log_ratio 0 1.00 -0.09 0.22 -1.47 -0.20 -0.04 0.00 2.90 <U+2581><U+2587><U+2581><U+2581><U+2581>
hft_ix_c9309_diff 0 1.00 -13.37 28.21 -122.00 -29.00 -6.00 0.00 188.00 <U+2581><U+2587><U+2582><U+2581><U+2581>
log_hft_ix_c9309_percent 3089 0.37 -Inf NaN -Inf -Inf 0.58 2.13 7.44 <U+2583><U+2587><U+2586><U+2581><U+2581>
p_hft_diff <- map(c("hft_ix_c9309_percent", "log_hft_ix_c9309_percent"),
  ~hftc %>%
  ggplot(aes_string(x = .x)) +
  geom_histogram()
)
plot_grid(plotlist = p_hft_diff)
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#> Warning: Removed 3690 rows containing non-finite values (stat_bin).

hftc_biodiv <- analysis_dataset %>%
  left_join(hftc, by = "siteid")
p_jaccard_y_c9309_log_ratio <- map(
  c("cat_hft_ix_c93", "cat_hft_ix_c09", "cat_hft_ix_c9309_log_ratio"),
  ~hftc_biodiv %>%
    ggplot(aes(y = jaccard_dis, x = year_nb,
        color = !!sym(.x))) +
    geom_point() +
    geom_smooth()

) 
p_jaccard_y_c9309_log_ratio
#> [[1]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#> 
#> [[2]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#> 
#> [[3]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

p_jaccard_log_y_c9309_log_ratio <- map(
  c("cat_hft_ix_c93", "cat_hft_ix_c09", "cat_hft_ix_c9309_log_ratio"),
  ~hftc_biodiv %>%
    ggplot(aes(y = jaccard_dis, x = log(year_nb + 1),
        color = !!sym(.x))) +
    geom_point() +
    geom_smooth()

)
p_jaccard_log_y_c9309_log_ratio
#> [[1]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#> 
#> [[2]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#> 
#> [[3]]
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

  • Correlation:
library(easystats)
tar_load(modelling_data)
cor_data <- modelling_data %>%
  left_join(hftc, by = "siteid") %>%
  select(!starts_with("cat") & !ends_with("scaled") & !starts_with("log_hft")) %>%
  select(-siteid, -main_bas)

corrplot::corrplot(
  cor(as.matrix(na.omit(cor_data), method = c("spearman")))
  )

9.2 SpaMM model

knitr::opts_chunk$set(eval = FALSE)
tu <- ti %>%
  mutate(
    m = list(lm(appearance ~ ord_stra, data = data))
    ,
    coef = list(broom.mixed::tidy(m))
  ) %>%
  unnest(coef) %>%
  filter(term == "ord_stra") %>%
  ungroup()

stat_data <- slp_riv %>%
  mutate(
    appearance = scale(appearance),
    ord_stra = as.factor(ord_stra),
    main_bas = as.factor(main_bas),
    log_dist_up_km = log(dist_up_km)
  )

m0 <- spaMM::fitme(appearance ~ ord_stra, stat_data)
plot(m0)

m1 <- spaMM::fitme(appearance ~ ord_stra + (1 | main_bas),
  stat_data)

m2 <- spaMM::fitme(
  appearance ~ ord_stra + (1 + ord_stra | main_bas),
  stat_data
)

m2d <- spaMM::fitme(
  appearance ~ dist_up_km + (1 + dist_up_km | main_bas),
  stat_data
)
summary(m2d)

m2d <- spaMM::fitme(
  appearance ~ log_dist_up_km + (1 + log_dist_up_km | ecoregion/main_bas/siteid),
  stat_data
)
summary(m2d)
plot(m2d)


tm <- stat_data %>%
  na.omit() %>%
  mutate(
    pred_appearance = predict(m2d,
                              re.form = ~log_dist_up_km + (1 + log_dist_up_km | ecoregion))[,1]
  )
tm %>%
  ggplot(aes(x = log_dist_up_km, y = pred_appearance, color = ecoregion)) +
  geom_line()
tar_load(analysis_dataset)
formula <- paste0(
  "jaccard",
  " ~ ",
  "year", "+", "year:ord_stra", " + ", "(1 + year:ord_stra| ecoregion/main_bas/siteid)"
)
spaMM::fitme(formula = formula, data = analysis_dataset)

formula <- paste0(
  "jaccard",
  " ~ ",
  "dis_up_km + tmp_dc_cyr + (1 + dist_up_km + tmp_dc_cyr | ecoregion/main_bas/siteid)"
)
m3 <- spaMM::fitme(scale(appearance) ~ ord_stra + (1 | main_bas) +
  Matern(1 | longitude + latitude),
  na.omit(slp_riv), control.HLfit = list(algebra = "decorr"))
  • Disappearance:
tu <- ti %>%
  mutate(
    m = list(lm(disappearance ~ log(dist_up_km), data = data)),
    coef = list(broom::tidy(m))
  ) %>%
  unnest(coef) %>%
  filter(term == "log(dist_up_km)") %>%
  ungroup()

tu %>%
  ggplot(aes(x = estimate)) +
  geom_histogram()

summary(lm(disappearance ~ log(dist_up_km), slp_riv))
cor_slp_env <-
  slp_riv[, !colnames(slp_riv) %in% c("siteid", "ecoregion", "main_bas")] %>%
  na.omit() %>%
  cor(., method = "spearman")
corrplot::corrplot(
  cor_slp_env
)
rigal_trends_df %>%
  ggplot(aes(x = jaccard , y = hillebrand)) +
  geom_point() +
#  geom_smooth(method = "gam") +
  labs(x = "Jaccard trends (similarity, binary)", y = "Hillebrand trends
    (similarity, relative abundance)")

library(INLA)
a3 <- inla(scale(appearance) ~ ord_stra + f(main_bas, model = "iid"),
  family = "gaussian",
  control.predictor = list(link = 1, compute = TRUE),
  control.compute = list(cpo = TRUE, dic = TRUE, config = TRUE),
  data = slp_riv)

9.4 Reproducibility

Reproducibility receipt
## datetime
Sys.time()

## repository
if(requireNamespace('git2r', quietly = TRUE)) {
  git2r::repository()
} else {
  c(
    system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
    system2("git", args = c("remote", "-v"), stdout = TRUE)
  )
}

## session info
sessionInfo()